home *** CD-ROM | disk | FTP | other *** search
- /* CopyBlits
-
- File name: CopyBlits.c
- Function: Optimized CopyBits
- History: 5/26/94 Original by George A. Warner
- History: 10/7/98 Interlace code written
-
- */
-
- #define DO_LOCKING 0
-
- #include "CopyBlits.h"
-
- static CTabHandle gCTabHandle = nil;
- static UInt16 gCTab16[256];
- static UInt32 gCTab32[256];
-
- // implementation
- void Set_ColorTable(CTabHandle pCTabHandle)
- {
- if (gCTabHandle != pCTabHandle)
- {
- gCTabHandle = pCTabHandle;
- if (pCTabHandle)
- {
- if ((*pCTabHandle)->ctSize >= 255)
- {
- short index;
- for (index = 0;index < 256;index++)
- {
- RGBColor tRGBColor = (**pCTabHandle).ctTable[index].rgb;
- gCTab32[index] = ((tRGBColor.red >> 8) << 16) +
- ((tRGBColor.green >> 8) << 8) +
- ((tRGBColor.blue >> 8) << 0);
- gCTab16[index] = ((tRGBColor.red >> 11) << 10) +
- ((tRGBColor.green >> 11) << 5) +
- ((tRGBColor.blue >> 11) << 0);
- }
- }
- }
- }
- }
-
- // Generic routine that uses pixel size to determ which optimized routine to use
- // If you KNOW your depth you should call the approate routine yourself and skip
- // the subroutine/switch overhead.
-
- void CopyBlits ( const PixMapHandle srcPixMapHdl,
- const PixMapHandle dstPixMapHdl,
- const Rect *srcRect,
- const Rect *dstRect
- )
- {
- if ((*srcPixMapHdl)->pixelSize == 8)
- {
- switch ((*dstPixMapHdl)->pixelSize)
- {
- case 8:
- // CopyBlits8(srcPixMapHdl,dstPixMapHdl,srcRect,dstRect);
- CopyBlits8_8(srcPixMapHdl,dstPixMapHdl,srcRect,dstRect);
- break;
- case 16:
- CopyBlits8_16(srcPixMapHdl,dstPixMapHdl,srcRect,dstRect);
- break;
- case 32:
- CopyBlits8_32(srcPixMapHdl,dstPixMapHdl,srcRect,dstRect);
- break;
- default:
- CopyBits((BitMap*) *srcPixMapHdl,(BitMap*) *dstPixMapHdl,srcRect,dstRect,0,nil);
- break;
- }
- }
- else
- CopyBits((BitMap*) *srcPixMapHdl,(BitMap*) *dstPixMapHdl,srcRect,dstRect,0,nil);
- }
-
-
- // This is one of the first non-asm blitters I ever saw. Not as fast as CopyBlits8_8 but I left
- // it in just to show you the skanky nested switch/while code (see Duff's device). ewueee!
-
- // 194 fps vs. 300 fps CopyBits
- void CopyBlits8 ( const PixMapHandle srcPixMapHdl,
- const PixMapHandle dstPixMapHdl,
- const Rect *srcRect,
- const Rect *dstRect
- )
- {
- long dstLeft,dstRight;
- long *srcRow,*dstRow;
- register long *srcPtr,*dstPtr;
- long leftMask,rightMask;
- long notLeftMask,notRightMask;
-
- long srcRowBytes;
- long dstRowBytes;
- long srcLeft;
-
- long dstLong;
- short dstLongs;
- short height;
- long offset;
- long timesCopy;
-
- // This assumes that the pixmap is already locked
- srcRow = (long *) (*srcPixMapHdl)->baseAddr;
- dstRow = (long *) (*dstPixMapHdl)->baseAddr;
-
- // get the bit offset to the src left edge
- srcLeft = (srcRect->left - (*srcPixMapHdl)->bounds.left) * (*srcPixMapHdl)->pixelSize;
-
- // offset the src ptr to the first long
- srcRow += srcLeft >> 5;
-
- // get the bit offset to the dst left and right edges
- dstLeft = (dstRect->left - (*dstPixMapHdl)->bounds.left) * (*dstPixMapHdl)->pixelSize;
- dstRight = (dstRect->right - (*dstPixMapHdl)->bounds.left) * (*dstPixMapHdl)->pixelSize;
-
- // get the number of middle longs to do minus the left edge long
- dstLongs = ((dstRight - dstLeft) >> 5) - 1;
-
- // offset the dst Ptr to the first long
- dstRow += dstLeft >> 5;
-
- // now compute left and right masks for the dst
- dstLeft &= 0x1f;
- leftMask = ( 1 << dstLeft ) - 1;
- notLeftMask = ~leftMask;
-
- dstRight &= 0x1f;
- notRightMask = ( 1 << dstRight ) - 1;
- rightMask = ~notRightMask;
-
- srcRowBytes = (*srcPixMapHdl)->rowBytes & 0x3fff;
- dstRowBytes = (*dstPixMapHdl)->rowBytes & 0x3fff;
-
- // offset the src and dst ptrs to the first row
- offset = (srcRect->top - (*srcPixMapHdl)->bounds.top) * srcRowBytes;
- srcRow = (long*) ((Ptr) srcRow + offset);
-
- offset = (dstRect->top - (*dstPixMapHdl)->bounds.top) * dstRowBytes;
- dstRow = (long*) ((Ptr) dstRow + offset);
-
- /* check if we need to do the left and right mask */
- if ( leftMask )
- {
- if ( notLeftMask == 0 )
- {
- leftMask = 0;
- dstLongs++;
- }
- }
-
- if ( rightMask )
- {
- if ( notRightMask == 0 )
- {
- rightMask = 0;
- dstLongs++;
- }
- }
-
- height = srcRect->bottom - srcRect->top; // No scaling allowed
-
- //for ( ; height >= 0; --height )
- // changing the above 'for()' to the below 'while()' is what made this blit routine
- // faster than CopyBits - a speed improvement equal to all other changes I had made
- // previously. (about 4 to 5 milliseconds, in case you were wondering)
- // Lesson: the true bottlenecks are not always the obvious ones
- while (height--)
- {
- srcPtr = srcRow;
- dstPtr = dstRow;
-
- /* do the masked left edge */
- if ( leftMask )
- {
- dstLong = *srcPtr++ & leftMask;
- dstLong |= *dstPtr & notLeftMask;
- *dstPtr++ = dstLong;
- }
-
- /* do the middle longs with Duff's device */
- timesCopy = (dstLongs + 15) >> 4;
-
- switch( dstLongs & 0xF )
- {
- case 0: do
- { *dstPtr++ = *srcPtr++;
- case 15: *dstPtr++ = *srcPtr++;
- case 14: *dstPtr++ = *srcPtr++;
- case 13: *dstPtr++ = *srcPtr++;
- case 12: *dstPtr++ = *srcPtr++;
- case 11: *dstPtr++ = *srcPtr++;
- case 10: *dstPtr++ = *srcPtr++;
- case 9: *dstPtr++ = *srcPtr++;
- case 8: *dstPtr++ = *srcPtr++;
- case 7: *dstPtr++ = *srcPtr++;
- case 6: *dstPtr++ = *srcPtr++;
- case 5: *dstPtr++ = *srcPtr++;
- case 4: *dstPtr++ = *srcPtr++;
- case 3: *dstPtr++ = *srcPtr++;
- case 2: *dstPtr++ = *srcPtr++;
- case 1: *dstPtr++ = *srcPtr++;
- } while( --timesCopy > 0 );
- }
-
- /* do the masked right edge */
- if ( rightMask )
- {
- dstLong = *srcPtr & rightMask;
- dstLong |= *dstPtr & notRightMask;
- *dstPtr = dstLong;
- }
-
- /* bump to the next row */
- srcRow = (long*) ((Ptr) srcRow + srcRowBytes);
- dstRow = (long*) ((Ptr) dstRow + dstRowBytes);
- }
- }
-
- // my Fastest (non-asm) 8 to 8 bit blitter
-
- // 354 fps vs. 300 fps CopyBits
- void CopyBlits8_8 ( const PixMapHandle srcPixMapHdl,
- const PixMapHandle dstPixMapHdl,
- const Rect *srcRect,
- const Rect *dstRect
- )
- {
- UInt32 doubleCount,lineCount,count;
- UInt8 startFlags, endFlags;
- UInt8 *src, *dst;
- UInt32 srcSkip, dstSkip;
- UInt32 width,height;
-
- width = srcRect->right - srcRect->left;
- height = srcRect->bottom - srcRect->top;
-
- if (!width || !height)
- return;
-
- #if DO_LOCKING
- {
- GWorldFlags srcGWFlags = GetPixelsState(srcPixMapHdl);
- GWorldFlags dstGWFlags = GetPixelsState(dstPixMapHdl);
-
- if (0 == (srcGWFlags & pixelsLocked)) // if source not locked
- if (!LockPixels(srcPixMapHdl)) // and we can't lock it
- return; // give up
-
- if (0 == (dstGWFlags & pixelsLocked)) // if dest not locked
- if (!LockPixels(dstPixMapHdl)) // and we can't lock it
- goto unlock;
- #endif
-
- srcSkip = (*srcPixMapHdl)->rowBytes & 0x3FFF;
- dstSkip = (*dstPixMapHdl)->rowBytes & 0x3FFF;
-
- src = (UInt8*) GetPixBaseAddr(srcPixMapHdl) +
- (srcSkip * (srcRect->top - (*srcPixMapHdl)->bounds.top)) +
- srcRect->left - (*srcPixMapHdl)->bounds.left;
- dst = (UInt8*) GetPixBaseAddr(dstPixMapHdl) +
- (dstSkip * (dstRect->top - (*dstPixMapHdl)->bounds.top)) +
- dstRect->left - (*dstPixMapHdl)->bounds.left;
-
- startFlags = ((UInt8 *) (((UInt32) src + 7U) & ~7U)) - src;
- endFlags = ((UInt32) (src + width)) & 7U;
- doubleCount = ((width - startFlags) - endFlags) / 8;
-
- // pre-fix skip values
- srcSkip -= width;
- dstSkip -= width;
-
- for (lineCount = 0; lineCount < height; lineCount++)
- {
- if (startFlags & 1) // byte align
- *dst++ = *src++;
-
- if (startFlags & 2) // word align
- {
- *(UInt16*) dst = *(UInt16*) src;
- src += 2;
- dst += 2;
- }
-
- if (startFlags & 4) // long align
- {
- *(UInt32*) dst = *(UInt32*) src;
- src += 4;
- dst += 4;
- }
-
- // copy doubles
- for (count = 0; count < doubleCount; count++)
- {
- *(double*) dst = *(double*) src;
- src += 8;
- dst += 8;
- }
-
- // copy remaining long
- if (endFlags & 4)
- {
- *(UInt32*) dst = *(UInt32*) src;
- src += 4;
- dst += 4;
- }
-
- // copy remaining word
- if (endFlags & 2)
- {
- *(UInt16*) dst = *(UInt16*) src;
- src += 2;
- dst += 2;
- }
-
- // copy remaining byte
- if (endFlags & 1)
- *dst++ = *src++;
-
- src += srcSkip;
- dst += dstSkip;
- }
- #if DO_LOCKING
- unlock:
- if (0 == (srcGWFlags & pixelsLocked)) // if source wasn't locked
- UnlockPixels(srcPixMapHdl); // unlock it
- if (0 == (dstGWFlags & pixelsLocked)) // if dest wasn't locked
- UnlockPixels(dstPixMapHdl); // unlock it
- }
- #endif
- }
-
- // my Fastest (non-asm) 8 to 16 bit blitter
-
- // 50 fps vs. 42 fps CopyBits
- void CopyBlits8_16 ( const PixMapHandle srcPixMapHdl,
- const PixMapHandle dstPixMapHdl,
- const Rect *srcRect,
- const Rect *dstRect
- )
- {
- UInt32 count;
- UInt8 *src;
- UInt16 *dst;
- UInt32 srcSkip, dstSkip;
- UInt32 width,height;
-
- Set_ColorTable((**srcPixMapHdl).pmTable);
-
- width = srcRect->right - srcRect->left;
- height = srcRect->bottom - srcRect->top;
-
- if (!width || !height)
- return;
-
- #if DO_LOCKING
- {
- GWorldFlags srcGWFlags = GetPixelsState(srcPixMapHdl);
- GWorldFlags dstGWFlags = GetPixelsState(dstPixMapHdl);
-
- if (0 == (srcGWFlags & pixelsLocked)) // if source not locked
- if (!LockPixels(srcPixMapHdl)) // and we can't lock it
- return; // give up
-
- if (0 == (dstGWFlags & pixelsLocked)) // if dest not locked
- if (!LockPixels(dstPixMapHdl)) // and we can't lock it
- goto unlock;
- #endif
-
- srcSkip = (*srcPixMapHdl)->rowBytes & 0x3FFF;
- dstSkip = (*dstPixMapHdl)->rowBytes & 0x3FFF;
-
- src = (UInt8*) (GetPixBaseAddr(srcPixMapHdl) +
- (srcSkip * (srcRect->top - (*srcPixMapHdl)->bounds.top)) +
- srcRect->left - (*srcPixMapHdl)->bounds.left);
- dst = (UInt16*) (GetPixBaseAddr(dstPixMapHdl) +
- (dstSkip * (dstRect->top - (*dstPixMapHdl)->bounds.top)) +
- ((dstRect->left - (*dstPixMapHdl)->bounds.left) << 1));
-
- // pre-fix skip values
- srcSkip -= width;
- dstSkip -= width << 1;
-
- while (height--)
- {
- for (count = 0; count < width; count++)
- *dst++ = gCTab16[*src++];
- src += srcSkip;
- dst = (UInt16*) (dstSkip + (Ptr) dst);
- }
-
- #if DO_LOCKING
- unlock:
- if (0 == (srcGWFlags & pixelsLocked)) // if source wasn't locked
- UnlockPixels(srcPixMapHdl); // unlock it
- if (0 == (dstGWFlags & pixelsLocked)) // if dest wasn't locked
- UnlockPixels(dstPixMapHdl); // unlock it
- }
- #endif
- }
-
- // my Fastest (non-asm) 8 to 32 bit blitter
-
- // 50 fps vs. 39 fps CopyBits
- void CopyBlits8_32 ( const PixMapHandle srcPixMapHdl,
- const PixMapHandle dstPixMapHdl,
- const Rect *srcRect,
- const Rect *dstRect
- )
- {
- register UInt32 count;
- register UInt8 *src;
- register UInt32 *dst;
- register UInt32 srcSkip, dstSkip;
- register UInt32 width,height;
-
- Set_ColorTable((**srcPixMapHdl).pmTable);
-
- width = srcRect->right - srcRect->left;
- height = srcRect->bottom - srcRect->top;
-
- if (!width || !height)
- return;
-
- #if DO_LOCKING
- {
- GWorldFlags srcGWFlags = GetPixelsState(srcPixMapHdl);
- GWorldFlags dstGWFlags = GetPixelsState(dstPixMapHdl);
-
- if (0 == (srcGWFlags & pixelsLocked)) // if source not locked
- if (!LockPixels(srcPixMapHdl)) // and we can't lock it
- return; // give up
-
- if (0 == (dstGWFlags & pixelsLocked)) // if dest not locked
- if (!LockPixels(dstPixMapHdl)) // and we can't lock it
- goto unlock;
- #endif
-
- srcSkip = (*srcPixMapHdl)->rowBytes & 0x3FFF;
- dstSkip = (*dstPixMapHdl)->rowBytes & 0x3FFF;
-
- src = (UInt8*) (GetPixBaseAddr(srcPixMapHdl) +
- (srcSkip * (srcRect->top - (*srcPixMapHdl)->bounds.top)) +
- srcRect->left - (*srcPixMapHdl)->bounds.left);
- dst = (UInt32*) (GetPixBaseAddr(dstPixMapHdl) +
- (dstSkip * (dstRect->top - (*dstPixMapHdl)->bounds.top)) +
- ((dstRect->left - (*dstPixMapHdl)->bounds.left) << 2));
-
- // pre-fix skip values
- srcSkip -= width;
- dstSkip -= width << 2;
-
- while (height--)
- {
- for (count = 0; count < width; count++)
- *dst++ = gCTab32[*src++];
- src += srcSkip;
- dst = (UInt32*) (dstSkip + (Ptr) dst);
- }
-
- #if DO_LOCKING
- unlock:
- if (0 == (srcGWFlags & pixelsLocked)) // if source wasn't locked
- UnlockPixels(srcPixMapHdl); // unlock it
- if (0 == (dstGWFlags & pixelsLocked)) // if dest wasn't locked
- UnlockPixels(dstPixMapHdl); // unlock it
- }
- #endif
- }
-
- // Ok, here it is. This is how you fake CopyBits into skipping every other line.
- // (I assume that the PixMaps are already locked)
- void CopyBlitsI ( const PixMapHandle srcPixMapHdl,
- const PixMapHandle dstPixMapHdl,
- const Rect *srcRect,
- const Rect *dstRect
- )
- {
- PixMap sPixMap = **srcPixMapHdl;
- PixMap dPixMap = **dstPixMapHdl;
- Rect sRect = *srcRect;
- Rect dRect = *dstRect;
-
- // backup one pixel
- // This prevents QuickDraw from using a blitter that's optimized
- // to go directly to VRAM (which ignores our slammed rowbytes)
- {
- sPixMap.baseAddr -= sPixMap.pixelSize / 8;
- dPixMap.baseAddr -= dPixMap.pixelSize / 8;
- sPixMap.bounds.left--; sPixMap.bounds.right--;
- dPixMap.bounds.left--; dPixMap.bounds.right--;
- }
-
- // double source rowbytes (keep top two flags)
- sPixMap.rowBytes = (sPixMap.rowBytes & 0xC000) |
- ((sPixMap.rowBytes << 1) & 0x3FFF);
-
- // double destination rowbytes (keep top two flags)
- dPixMap.rowBytes = (dPixMap.rowBytes & 0xC000) |
- ((dPixMap.rowBytes << 1) & 0x3FFF);
-
- // half source rect heights
- sRect.bottom = (sRect.bottom - sPixMap.bounds.top) >> 1;
- sRect.top = (sRect.top - sPixMap.bounds.top) >> 1;
-
- sPixMap.bounds.top >>= 1;
- sPixMap.bounds.bottom >>= 1;
-
- sRect.bottom += sPixMap.bounds.top;
- sRect.top += sPixMap.bounds.top;
-
- // half destination rect heights
- dRect.bottom = (dRect.bottom - dPixMap.bounds.top) >> 1;
- dRect.top = (dRect.top - dPixMap.bounds.top) >> 1;
-
- dPixMap.bounds.top >>= 1;
- dPixMap.bounds.bottom >>= 1;
-
- dRect.bottom += dPixMap.bounds.top;
- dRect.top += dPixMap.bounds.top;
-
- // Copy it
- CopyBits((BitMap*) &sPixMap,(BitMap*) &dPixMap,&sRect,&dRect,srcCopy,nil);
- }
-